***********************************************
* Title: rwanda_jde_tableA8.do
* Author: Todd Pugatch
* Last update: June 4 2024
* Description: analysis for Blimpo and Pugatch, "Entrepreneurship Education
*	and Teacher Training in Rwanda," Stage 2 Registered report, Journal of 
*	Development Economics
* Inputs: 	rwanda_school_trainattend_jde.dta 
*			headteacher_merge_jde.dta
*			teacher_merge_jde.dta
* Outputs: 	rwanda_jde_tableA8.[txt/xls/out]
* Notes: produces Table A8
************************************************

* Set environment
local start=`"$S_TIME"'
clear
clear matrix
clear mata
graph drop _all
program drop _all
cap log close
set more off

* Set directories 
*global main "[SET MAIN DIRECTORY HERE]"
	global rawdata "$main/01_data/01_raw"
	global cleandata "$main/01_data/02_clean"
	global dofiles "$main/02_dofiles"
	global results "$main/03_results"
	global temp "$main/04_output"

* begin log file
log using "$temp/rwanda_jde_tableA8.txt", text replace

* PREP DATA ON PROGRAM TAKE-UP: teacher training & exchange visit attendance
/*Goals: 	1. confirm take-up<85% for exchange visit, triggering IV regressions
			2. merge take-up data with teacher and head teacher surveys for analysis*/

qui use "$cleandata/rwanda_school_trainattend_jde.dta", clear

table treatment, statistic(freq) statistic(mean training_any_pct) statistic(mean exchange_pct)

* merge take-up with teacher & head teacher surveys, by school
local takeup "training_any_pct exchange_pct"
foreach d in teacher headteacher {
	qui use "$cleandata/`d'_merge_jde.dta", clear
	if ("`d'"=="headteacher") drop _merge
	qui merge m:1 school_code using "$cleandata/rwanda_school_trainattend_jde.dta", keepusing(`takeup')
	qui drop if _merge==2
	drop _merge
	qui save "$temp/`d'temp.dta", replace
}

****************************
* FIRST PASS THROUGH: SET UP MULTIPLE HYPOTHESIS CORRECTION
/*Proceed in 3 steps:
	1. Run all regressions for H1. Save coefficients & std. errors.
	2. Calculate q-values for all hypotheses.
	3. Re-run regressions to get output, including q-values.*/
****************************

* 1. GET COEF/SE/P FOR ALL REGRESSIONS	
* TEACHER CURRICULAR IMPLEMENTATION
/*Goal: produce columns 3-5 of Table P2
	Uses teacher survey data.
	Regress curricular implementation measure on treatment status, controlling
		for baseline outcome and randomization strata (baseline outcome not applicable.
	Impute values for missing baseline outcome. Include dummy for missing in 
		regression.
	For Lee bounds, proceed in 3 steps:
		1) regress outcome on baseline and strata indicators
		2) get residuals
		3) calculate Lee bounds on these residuals
	Calculate Lee bounds first, then regression. This allows for exporting bounds
		into regression results. Note that this method does not always produce
		bounds containing the original coefficient, because treatment effect 
		from residualized regression not exactly equal to original treatment
		effect (because of potentially spurious correlation between treatment
		and covariates).*/

qui use "$temp/teachertemp.dta", clear

local Y "skillslab_sched_el lessonplan_index_el eknowledge_index_el"
local Y0 "skillslab_definition_bl lessonplan_index_bl eknowledge_index_bl"

* prep missing values for teachers without baseline outcome
foreach y0 in `Y0' {
	qui su `y0' if treatment==0 & insample_bl==1
	qui replace `y0'=r(mean) if insample_bl!=1
	qui gen `y0'm=(insample_bl!=1)
	lab var `y0'm "missing value for `y0'"
}

* regressions: Table P2, columns 3-5
local i=1
/*Column 3*/
qui ivregress 2sls skillslab_sched_el (exchange_pct=treatment) skillslab_definition_bl skillslab_definition_blm i.strata, robust
scalar define H1_c`i'b=_b[exchange_pct]
scalar define H1_c`i'se=_se[exchange_pct]
scalar define t=_b[exchange_pct]/_se[exchange_pct]
scalar define dfr=e(N)-e(df_m)
scalar define H1_c`i'p=2*ttail(dfr,abs(t))
local i=`i'+1

/*Column 4*/
qui ivregress 2sls lessonplan_index_el (exchange_pct=treatment) lessonplan_index_bl lessonplan_index_blm i.strata, robust
scalar define H1_c`i'b=_b[exchange_pct]
scalar define H1_c`i'se=_se[exchange_pct]
scalar define t=_b[exchange_pct]/_se[exchange_pct]
scalar define dfr=e(N)-e(df_m)
scalar define H1_c`i'p=2*ttail(dfr,abs(t))
local i=`i'+1

/*Column 5*/
qui ivregress 2sls eknowledge_index_el (exchange_pct=treatment) eknowledge_index_bl eknowledge_index_blm i.strata, robust
scalar define H1_c`i'b=_b[exchange_pct]
scalar define H1_c`i'se=_se[exchange_pct]
scalar define t=_b[exchange_pct]/_se[exchange_pct]
scalar define dfr=e(N)-e(df_m)
scalar define H1_c`i'p=2*ttail(dfr,abs(t))
local i=`i'+1

* HEAD TEACHER PERCEPTIONS
/*Goal: produce columns 6-7 of Table P2
	Uses teacher survey data.
	Regress curricular implementation measure on treatment status, controlling
		for baseline outcome and randomization strata (baseline outcome not applicable.
	Impute values for missing baseline outcome. Include dummy for missing in 
		regression.*/

qui use "$temp/headteachertemp.dta", clear

local Y "ht_pedagogy_active_index_el goalteach_shld_skill_el"
local Y0 "ht_pedagogy_active_index_bl skillbasedsupport_bl"

* prep missing values for head teachers without baseline outcome
foreach y0 in `Y0' {
	qui gen `y0'i=`y0'
	qui replace `y0'i=r(mean) if `y0'==.	
	qui su `y0' if treatment==0
	qui gen `y0'm=(`y0'==.)
	lab var `y0'i "`y0', imputing control mean for missing values"
	lab var `y0'm "missing value for `y0'"
}

* regressions: Table P2, columns 6-7

/*Column 6*/
qui ivregress 2sls ht_pedagogy_active_index_el (exchange_pct=treatment) ht_pedagogy_active_index_bli ht_pedagogy_active_index_blm i.strata, robust
scalar define H1_c`i'b=_b[exchange_pct]
scalar define H1_c`i'se=_se[exchange_pct]
scalar define t=_b[exchange_pct]/_se[exchange_pct]
scalar define dfr=e(N)-e(df_m)
scalar define H1_c`i'p=2*ttail(dfr,abs(t))
local i=`i'+1

/*Column 7*/
qui ivregress 2sls goalteach_shld_skill_el (exchange_pct=treatment) skillbasedsupport_bli skillbasedsupport_blm i.strata, robust
scalar define H1_c`i'b=_b[exchange_pct]
scalar define H1_c`i'se=_se[exchange_pct]
scalar define t=_b[exchange_pct]/_se[exchange_pct]
scalar define dfr=e(N)-e(df_m)
scalar define H1_c`i'p=2*ttail(dfr,abs(t))

* 2. CALCULATE Q-VALUES FOR ALL HYPOTHESES
/*Calculate sharpened q-values from Benjamini, Krieger, Yuketeli (2006).
	Use code from fdr_sharpened_qvalues.do from Anderson (2008), available at 
		https://are.berkeley.edu/~mlanderson/ARE_Website/Research.html*/
* prep data: put p-values from above into a dataset
clear
local n=`i'
qui set obs `n'
qui gen hypothesis=1
qui gen column=_n
foreach x in b se pval {
	qui gen double `x'=.
}
forval i=1/`n' {
	qui replace b=H1_c`i'b in `i'
	qui replace se=H1_c`i'se in `i'
	qui replace pval=H1_c`i'p in `i' 
}

* START OF ANDERSON CODE FROM fdr_sharpened_qvalues.do (modified)
* Collect the total number of p-values tested
quietly sum pval
local totalpvals = r(N)

* Sort the p-values in ascending order and generate a variable that codes each p-value's rank

quietly gen int original_sorting_order = _n
quietly sort pval
quietly gen int rank = _n if pval~=.

* Generate the variable that will contain the BKY (2006) sharpened q-values

qui gen bky06_qval = 1 if pval~=.
	
* Set the initial counter to 1 

local qval = 1

/* Set up a loop that begins by checking which hypotheses are rejected at q = 1.000, 
	then checks which hypotheses are rejected at q = 0.999, 
	then checks which hypotheses are rejected at q = 0.998, etc.  
	The loop ends by checking which hypotheses are rejected at q = 0.001. */
while `qval' > 0 {
	* First Stage
	* Generate the adjusted first stage q level we are testing: q' = q/1+q
	local qval_adj = `qval'/(1+`qval')
	
	* Generate value q'*r/M
	qui gen fdr_temp1 = `qval_adj'*rank/`totalpvals'
	
	* Generate binary variable checking condition p(r) <= q'*r/M
	qui gen reject_temp1 = (fdr_temp1>=pval) if pval~=.
	
	* Generate variable containing p-value ranks for all p-values that meet above condition
	qui gen reject_rank1 = reject_temp1*rank
	
	* Record the rank of the largest p-value that meets above condition
	qui egen total_rejected1 = max(reject_rank1)

	* Second Stage
	* Generate the second stage q level that accounts for hypotheses rejected in first stage: q_2st = q'*(M/m0)
	local qval_2st = `qval_adj'*(`totalpvals'/(`totalpvals'-total_rejected1[1]))
	
	* Generate value q_2st*r/M
	qui gen fdr_temp2 = `qval_2st'*rank/`totalpvals'
	
	* Generate binary variable checking condition p(r) <= q_2st*r/M
	qui gen reject_temp2 = (fdr_temp2>=pval) if pval~=.
	
	* Generate variable containing p-value ranks for all p-values that meet above condition
	qui gen reject_rank2 = reject_temp2*rank
	
	* Record the rank of the largest p-value that meets above condition
	qui egen total_rejected2 = max(reject_rank2)

	* A p-value has been rejected at level q if its rank is less than or equal to the rank of the max p-value that meets the above condition
	qui replace bky06_qval = `qval' if rank <= total_rejected2 & rank~=.
	
	* Reduce q by 0.001 and repeat loop
	drop fdr_temp* reject_temp* reject_rank* total_rejected*
	local qval = `qval' - .001
}

quietly sort original_sorting_order

******************************
* END OF ANDERSON CODE FROM fdr_sharpened_qvalues.do (modified)
******************************
* list results
list hypothesis column b se pval bky06_qval

* prep results for inclusion in final regression output
forval i=1/`n' {
	scalar define H1_c`i'q=bky06_qval in `i'
}

* 3. RE-RUN REGRESSIONS TO GET OUTPUT, INCLUDING Q-VALUES	
* TEACHER CURRICULAR IMPLEMENTATION
/*Goal: produce columns 3-5 of Table P2
	Uses teacher survey data.
	Regress curricular implementation measure on treatment status, controlling
		for baseline outcome and randomization strata (baseline outcome not applicable.
	Impute values for missing baseline outcome. Include dummy for missing in 
		regression.
	Omit Lee bounds.
	Include sharpened q-values calculated above.*/

qui use "$temp/teachertemp.dta", clear

local Y "skillslab_sched_el lessonplan_index_el eknowledge_index_el"
local Y0 "skillslab_definition_bl lessonplan_index_bl eknowledge_index_bl"

* prep missing values for teachers without baseline outcome
foreach y0 in `Y0' {
	qui su `y0' if treatment==0 & insample_bl==1
	qui replace `y0'=r(mean) if insample_bl!=1
	qui gen `y0'm=(insample_bl!=1)
	lab var `y0'm "missing value for `y0'"
}

* regressions: Table P2, columns 3-5
local stat ""1st stage F",F,"control mean",mu_c,"baseline mean",mu_0,"q-value",qv"
local i=1

/*Column 3*/
qui ivregress 2sls skillslab_sched_el (exchange_pct=treatment) skillslab_definition_bl skillslab_definition_blm i.strata, robust
qui estat first
matrix define X=r(singleresults)
scalar define F=X[1,4]
qui su skillslab_sched_el if treatment==0 & e(sample)
scalar define mu_c=r(mean)
qui su skillslab_definition_bl if e(sample)
scalar define mu_0=r(mean)
scalar define qv=H1_c`i'q
outreg2 exchange_pct using "$results/rwanda_jde_tableA8.xls", se excel nolabel nocons addstat(`stat') replace
local i=`i'+1

/*Column 4*/
qui ivregress 2sls lessonplan_index_el (exchange_pct=treatment) lessonplan_index_bl lessonplan_index_blm i.strata, robust
qui estat first
matrix define X=r(singleresults)
scalar define F=X[1,4]
qui su lessonplan_index_el if treatment==0 & e(sample)
scalar define mu_c=r(mean)
qui su lessonplan_index_bl if e(sample)
scalar define mu_0=r(mean)
scalar define qv=H1_c`i'q
outreg2 exchange_pct using "$results/rwanda_jde_tableA8.xls", se excel nolabel nocons addstat(`stat') append
local i=`i'+1

/*Column 5*/
qui ivregress 2sls eknowledge_index_el (exchange_pct=treatment) eknowledge_index_bl eknowledge_index_blm i.strata, robust
qui estat first
matrix define X=r(singleresults)
scalar define F=X[1,4]
qui su eknowledge_index_el if treatment==0 & e(sample)
scalar define mu_c=r(mean)
qui su eknowledge_index_bl if e(sample)
scalar define mu_0=r(mean)
scalar define qv=H1_c`i'q
outreg2 exchange_pct using "$results/rwanda_jde_tableA8.xls", se excel nolabel nocons addstat(`stat') append
local i=`i'+1

* HEAD TEACHER PERCEPTIONS
/*Goal: produce columns 6-7 of Table P2
	Uses teacher survey data.
	Regress curricular implementation measure on treatment status, controlling
		for baseline outcome and randomization strata (baseline outcome not applicable.
	Impute values for missing baseline outcome. Include dummy for missing in 
		regression.
	Include sharpened q-values calculated above.*/
qui use "$temp/headteachertemp.dta", clear

local Y "ht_pedagogy_active_index_el goalteach_shld_skill_el"
local Y0 "ht_pedagogy_active_index_bl skillbasedsupport_bl"

* prep missing values for head teachers without baseline outcome
foreach y0 in `Y0' {
	qui gen `y0'i=`y0'
	qui replace `y0'i=r(mean) if `y0'==.	
	qui su `y0' if treatment==0
	qui gen `y0'm=(`y0'==.)
	lab var `y0'i "`y0', imputing control mean for missing values"
	lab var `y0'm "missing value for `y0'"
}

* regressions: Table P2, columns 6-7

/*Column 6*/
qui ivregress 2sls ht_pedagogy_active_index_el (exchange_pct=treatment) ht_pedagogy_active_index_bli ht_pedagogy_active_index_blm i.strata, robust
qui estat first
matrix define X=r(singleresults)
scalar define F=X[1,4]
qui su ht_pedagogy_active_index_el if treatment==0 & e(sample)
scalar define mu_c=r(mean)
qui su ht_pedagogy_active_index_bl if e(sample)
scalar define mu_0=r(mean)
scalar define qv=H1_c`i'q
outreg2 exchange_pct using "$results/rwanda_jde_tableA8.xls", se excel nolabel nocons addstat(`stat') append
local i=`i'+1

/*Column 7*/
qui ivregress 2sls goalteach_shld_skill_el (exchange_pct=treatment) skillbasedsupport_bli skillbasedsupport_blm i.strata, robust
qui estat first
matrix define X=r(singleresults)
scalar define F=X[1,4]
qui su goalteach_shld_skill_el if treatment==0 & e(sample)
scalar define mu_c=r(mean)
qui su skillbasedsupport_bl if e(sample)
scalar define mu_0=r(mean)
scalar define qv=H1_c`i'q
outreg2 exchange_pct using "$results/rwanda_jde_tableA8.xls", se excel nolabel nocons addstat(`stat') append


erase "$temp/teachertemp.dta"
erase "$temp/headteachertemp.dta"
local end=`"$S_TIME"' 
di "`start'"
di "`end'"
log close
